home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
TeX 1995 July
/
TeX CD-ROM July 1995 (Disc 1)(Walnut Creek)(1995).ISO
/
biblio
/
bibtex
/
utils
/
lookbibtex
/
lookbibtex.dist
< prev
next >
Wrap
Text File
|
1993-03-18
|
6KB
|
221 lines
#!/usr/dist/bin/perl
#
# lookbibtex 1.22
# Look in to a bib file.
# Comments to <johnh@cs.ucla.edu>.
#
# Copyright (C) 1990 by John Heidemann
# This is distributed under the GNU Public Licence, Version 1 (Feb 89).
# See the Perl documentation for a copy of that license.
#
# 4-Oct-90 it is hacked together.
# 19-Nov-90 Now it remembers "'s and join such lines.
# It also removes nasty characters like {} from the search string.
# 20-Nov-90 Umlaut accents handled correctly.
# 28-Nov-90 A simple heuristic to handle multi-line fields with {}'s is added.
# In addition, we compress all whitespace to single spaces in the
# searched version. lookbibtex 1.1
# 4-Jan-91 Converted the -k option to -f, since field makes more sense than
# keyword. lookbibtex 1.11
# 17-Jan-91 Added -s option to pass through strings, instead of ignoring them.
# 31-May-91 ficus directory moved
# 26-Aug-91 Documentation fixed. The environment variable LOOKBIBTEXFILE
# will set the default bibtex file to use.
# lookbibtex 1.12 released, posted to anonymous ftp at cs.ucla.edu.
# 28-Aug-91 Usage string fixed at suggestion of
# Henk P. Penning <henkp@cs.ruu.nl>.
# 5-Sep-91 Argument processing re-done (now matches grep, as it always
# should have). Changes from Tim Wilson <tdw@cl.cam.ac.uk> to
# handle multiple bib files and select default bib files from BIBINPUTS.
# lookbibtex 1.2
# 29-Jan-92 Bug reported by Dana Jacobsen <jacobsd@frisby.cs.orst.edu>:
# "badkeys" are handled in a case sensitive manner. Fixed.
# lookbibtex 1.21
# 25-Feb-92 Sigh. Bug fixes always make more bugs. Bug in last fix, fixed.
# lookbibtex 1.22
#
# This program relies on the convention that the closing } of a
# bib entry is the only } in the first non-whitespace column,
# and that the opening @ is also there.
#
$* = 1; # make searches on vars with imbedded newlines work
$prog = substr($0,rindex($0,'/')+1);
$badkeys = "string"; # keys to ignore (list in lowercase only)
#
# do argument processing
#
@files = (); # files to search
$passthroughbad = 0; # -s flag
undef ($pattern); # will be set below
undef ($keyword); # may be set below
sub remember_file {
local ($file) = @_;
local ($dev, $ino) = stat ($file);
local ($key) = "$dev,$ino";
if (!defined($files{$key})) {
$files{$key} = $file;
push (@files, $file);
};
# warn ("file $file ($key) remembered.\n");
};
while ($#ARGV >= 0) {
if ($ARGV[0] eq "-s") {
$passthroughbad = 1;
} elsif ($ARGV[0] eq "-f" && $#ARGV >= 1) {
$keyword = $ARGV[1];
shift (@ARGV);
} elsif (defined($pattern)) {
&remember_file ($ARGV[0]);
} else {
$pattern = $ARGV[0];
};
shift (@ARGV);
};
if (!defined($pattern)) {
die ("Usage: $prog [-s] [-f field] regexp [bibfile.bib ...]\n" .
" Fields restricts the regexp search to that bibtex " .
"field entry (author, etc.)\n" .
" Default bibfile is $defaultfile, - indicates stdin.\n" .
" Regexp is a Perl regexp.\n");
};
#
# handle the keyword by modifying the pattern
#
if (defined($keyword)) {
$pattern = "^\\s*${keyword}\\s*=.*${pattern}";
# print "pattern is $pattern\n";
};
#
# Handle choosing default bib files:
# Select anything from BIBINPUTS.
#
if ($#files == -1) {
$searchpath = ($ENV{'BIBINPUTS'} || ".");
foreach $dir (split(/:/, $searchpath)) {
opendir(DIR, $dir) || do {
warn "$prog: Can't open directory `$dir', skipping\n";
next;
};
foreach $file (grep(/\.bib$/, readdir(DIR))) {
&remember_file ($dir . "/" . $file);
};
closedir(DIR);
};
};
die ("$prog: no files on command line or in BIBINPUTS\n") if ($#files == -1);
$manyfiles = ($#files > 0); # remember if to show filenames or not
#
# Certain keys we really want to ignore because
# they're not bib entries. They're listed here.
#
@badkeys = split(/,/, $badkeys);
foreach $i (@badkeys) {
$badkeys{$i} = "bad"; # just make them defined
};
#
# To do searches right, we have to make everything
# for a field on one line.
# This routine does that, and also gets rid of {}'s
# which tend to get in the way for searches. In the
# same vein, it collapses all whitespace to single spaces.
#
# To know when to join lines, we use two simple heuristics:
# is there are a odd number of "'s on a line, we must enter or exit
# multi-line mode. If there are more {'s than }'s, we must enter,
# and if there are more }'s than {'s we must exit (anything on
# the first line is ignored).
#
sub printtosearch {
local ($print) = @_;
local ($search, $mode) = ("", 1);
local ($opencurley, $closecurley) = (0,0);
@lines = split(/\n/, $print);
@lines[0] =~ s/{/ /;
foreach $ln (@lines) {
# remove and count curley brackets
$opencurley = ($ln =~ s/[{]//g);
$closecurley = ($ln =~ s/[}]//g);
if ($opencurley-$closecurley < 0) {
$mode = 1;
} elsif ($opencurley-$closecurley > 0) {
$mode = 0;
} else {
# remove umlauts so quote handling works,
# and then change modes if required.
$ln =~ s/\\"//g;
$mode = !$mode if (($ln =~ tr/"/"/) % 2 == 1);
};
$search .= $ln;
$search .= "\n" if ($mode);
};
$search =~ s/[ \t]+/ /g;
return $search;
}
#
# looking for beginning of bib entry is state 1, in bib is state 2
#
$LOOKING = 1; $INBIB = 2;
foreach $file (@files) {
open (INF, "<$file") || warn ("cannot open bibfile $file\n");
$state = $LOOKING;
while (<INF>) {
# print "line ", $i++, " state=$state: " . "$_\n";
# beware RCS munging $state:...$
if ($state == $LOOKING) {
if (/^[ \t]*@(\w+)/) { # beginning of entry
($key = $1) =~ tr/A-Z/a-z/;
# case insensitive keywords
if (! defined($badkeys{$key})) {
$state = $INBIB;
$bibentry = $_;
} elsif ($passthroughbad) {
print "$_"; # a hack for @string
};
};
} elsif ($state == $INBIB) {
$bibentry .= $_;
if (/^[ \t]*}/) { # ending
$searchentry = &printtosearch($bibentry);
if ($searchentry =~ /$pattern/i) {
print "$file:\n" if ($manyfiles);
print "$bibentry\n";
};
$state = $LOOKING;
}
} else {
die ("state problem, $state\n");
};
};
};